CASE 4 - Teaching

Michael Bigler, Tania Loureiro, William Elkiess, Dakota Cuellar and Ilyana El Mendili, Liam Phan

2023-03-10

Packages

library(knitr) # html tables
library(gtsummary)
library(dplyr)
FALSE 
FALSE Attaching package: 'dplyr'
FALSE The following objects are masked from 'package:stats':
FALSE 
FALSE     filter, lag
FALSE The following objects are masked from 'package:base':
FALSE 
FALSE     intersect, setdiff, setequal, union
source(file="RFn_Plot-lmSim.R")
library(leaps)

Data Loading

teaching <- readxl::read_excel('Bteaching.xls')
teaching <- data.frame(teaching)
teaching$GROUP <- factor(teaching$GROUP, level = c('C', 'E'), labels = c('Control', 'Experiment'))
teaching$GENDER <- factor(teaching$GENDER, level = c('M', 'F'), labels =  c('Male', 'Female'))
# Don't need this teaching$TAUGHT <- factor(ifelse(teaching$PREF == 'N' | teaching$GROUP == 'Control', 'TR', teaching$PREF), level = c('TR', 'A', 'T', 'K', 'V'), labels = c('Traditional', 'Auditory', 'Tactile', 'Kinesthetic', 'Visual'))
teaching$PREF <- factor(teaching$PREF, level = c('A', 'N', 'T', 'K', 'V'), labels = c('Auditory', 'No Preference', 'Tactile', 'Kinesthetic', 'Visual')) # get understandable lables
teaching$SLEVEL <- factor(teaching$SLEVEL, level = c('E', 'H'), labels = c('Elementary', 'Highschool'))

Data exploratory

kable(summary(teaching))
GROUP SESSION PREF GENDER SLEVEL PRE POST ATT
Control :43 Length:87 Auditory :21 Male :17 Elementary:59 Min. : 5.00 Min. : 36.00 Min. :21.00
Experiment:44 Class :character No Preference:32 Female:70 Highschool:28 1st Qu.: 40.00 1st Qu.: 76.00 1st Qu.:42.50
NA Mode :character Tactile :23 NA NA Median : 55.00 Median : 80.00 Median :50.00
NA NA Kinesthetic : 4 NA NA Mean : 55.63 Mean : 80.87 Mean :48.05
NA NA Visual : 7 NA NA 3rd Qu.: 75.00 3rd Qu.: 90.00 3rd Qu.:55.00
NA NA NA NA NA Max. :100.00 Max. :100.00 Max. :60.00
teaching$index = 1:nrow(teaching)
tsibble::tsibble(teaching, index = index)
FALSE # A tsibble: 87 x 9 [1]
FALSE    GROUP   SESSION PREF          GENDER SLEVEL       PRE  POST   ATT index
FALSE    <fct>   <chr>   <fct>         <fct>  <fct>      <dbl> <dbl> <dbl> <int>
FALSE  1 Control S1      Auditory      Female Elementary     5    60    41     1
FALSE  2 Control S2      Auditory      Female Elementary    20    80    56     2
FALSE  3 Control S4      Auditory      Female Elementary    95    80    56     3
FALSE  4 Control S1      No Preference Male   Elementary     5    36    36     4
FALSE  5 Control S3      No Preference Female Elementary    90    88    40     5
FALSE  6 Control S1      Tactile       Female Highschool    85    84    50     6
FALSE  7 Control S2      Tactile       Female Elementary    45    80    58     7
FALSE  8 Control S2      Tactile       Female Highschool    65    80    56     8
FALSE  9 Control S2      No Preference Female Elementary    55    76    54     9
FALSE 10 Control S3      Tactile       Female Elementary    85    84    59    10
FALSE # ℹ 77 more rows
table1 <- tbl_summary(
  teaching,
  by = GROUP, # split table by group
  missing = "no" # don't list missing data separately
) |>
  add_n() |>  # add column with total number of non-missing observations
  add_p() |>  # test for a difference between groups
  modify_header(label = "**Variable**") |> # update the column header
  bold_labels()
FALSE Warning for variable 'PRE':
FALSE simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
FALSE Warning for variable 'POST':
FALSE simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
FALSE Warning for variable 'ATT':
FALSE simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
table1
Variable N Control, N = 431 Experiment, N = 441 p-value2
SESSION 87 >0.9
    S1 11 (26%) 11 (25%)
    S2 11 (26%) 12 (27%)
    S3 11 (26%) 11 (25%)
    S4 10 (23%) 10 (23%)
PREF 87 0.4
    Auditory 12 (28%) 9 (20%)
    No Preference 17 (40%) 15 (34%)
    Tactile 11 (26%) 12 (27%)
    Kinesthetic 2 (4.7%) 2 (4.5%)
    Visual 1 (2.3%) 6 (14%)
GENDER 87 0.4
    Male 10 (23%) 7 (16%)
    Female 33 (77%) 37 (84%)
SLEVEL 87 0.4
    Elementary 31 (72%) 28 (64%)
    Highschool 12 (28%) 16 (36%)
PRE 87 60 (45, 78) 50 (35, 75) 0.4
POST 87 80 (72, 84) 88 (80, 96) <0.001
ATT 87 49 (42, 54) 52 (45, 55) 0.2
index 87 22 (12, 32) 66 (55, 76) <0.001
1 n (%); Median (IQR)
2 Pearson's Chi-squared test; Fisher's exact test; Wilcoxon rank sum test; Wilcoxon rank sum exact test
tbl_strata <-
  teaching |>
  tbl_strata(
    strata = GROUP,
    .tbl_fun =
      ~ .x |>
        tbl_summary(by = SESSION, missing = "no") |>
      add_n() |>  # add column with total number of non-missing observations
      add_p() |>  # test for a difference between groups
      modify_header(label = "**Variable**") |> # update the column header
      bold_labels() ,
    .header = "**{strata}**, N = {n}"
  )
tbl_strata
Variable Control, N = 43 Experiment, N = 44
N S1, N = 111 S2, N = 111 S3, N = 111 S4, N = 101 p-value2 N S1, N = 111 S2, N = 121 S3, N = 111 S4, N = 101 p-value2
PREF 43 0.10 44 0.9
    Auditory 3 (27%) 6 (55%) 0 (0%) 3 (30%) 2 (18%) 2 (17%) 3 (27%) 2 (20%)
    No Preference 3 (27%) 2 (18%) 8 (73%) 4 (40%) 4 (36%) 6 (50%) 3 (27%) 2 (20%)
    Tactile 3 (27%) 3 (27%) 3 (27%) 2 (20%) 3 (27%) 2 (17%) 4 (36%) 3 (30%)
    Kinesthetic 1 (9.1%) 0 (0%) 0 (0%) 1 (10%) 0 (0%) 0 (0%) 1 (9.1%) 1 (10%)
    Visual 1 (9.1%) 0 (0%) 0 (0%) 0 (0%) 2 (18%) 2 (17%) 0 (0%) 2 (20%)
GENDER 43 0.078 44 >0.9
    Male 5 (45%) 3 (27%) 0 (0%) 2 (20%) 2 (18%) 2 (17%) 1 (9.1%) 2 (20%)
    Female 6 (55%) 8 (73%) 11 (100%) 8 (80%) 9 (82%) 10 (83%) 10 (91%) 8 (80%)
SLEVEL 43 0.046 44 0.4
    Elementary 8 (73%) 8 (73%) 5 (45%) 10 (100%) 8 (73%) 6 (50%) 6 (55%) 8 (80%)
    Highschool 3 (27%) 3 (27%) 6 (55%) 0 (0%) 3 (27%) 6 (50%) 5 (45%) 2 (20%)
PRE 43 65 (38, 75) 50 (20, 60) 70 (60, 80) 55 (46, 71) 0.3 44 40 (20, 72) 50 (32, 52) 70 (42, 78) 62 (45, 74) 0.2
POST 43 72 (64, 76) 80 (76, 80) 80 (78, 84) 80 (77, 87) 0.093 44 92 (80, 96) 88 (80, 92) 92 (82, 96) 80 (80, 90) 0.4
ATT 43 42 (38, 51) 51 (44, 55) 49 (42, 54) 47 (43, 54) 0.6 44 46 (38, 52) 48 (43, 52) 51 (46, 56) 53 (51, 56) 0.2
index 43 22 (9, 30) 23 (8, 28) 17 (12, 34) 24 (16, 34) 0.8 44 61 (56, 72) 60 (52, 74) 62 (54, 67) 79 (73, 83) 0.020
1 n (%); Median (IQR)
2 Fisher's exact test; Kruskal-Wallis rank sum test

Dummy encoding

library(fastDummies)
teaching <- dummy_columns(teaching, select_columns = c('GROUP', 'SESSION', 'PREF', 'GENDER', 'SLEVEL'), remove_selected_columns = TRUE, remove_first_dummy = TRUE)

DID (Liam, Michael & Tania)

teaching$ID <- 1:nrow(teaching)
teaching_DID <- teaching %>% slice(rep(1:n(), each = 2))
teaching_DID$POST_ind <- rep(c(0,1),nrow(teaching)) 
teaching_DID$OUTCOME <- ifelse(teaching_DID$POST_ind == 1, teaching_DID$POST, teaching_DID$PRE)
teaching_DID <- subset(teaching_DID, select = -c(POST, PRE))

DID without covariates

DID <- lm(OUTCOME ~ POST_ind + GROUP_Experiment + GROUP_Experiment*POST_ind, teaching_DID)
summary(DID)
FALSE 
FALSE Call:
FALSE lm(formula = OUTCOME ~ POST_ind + GROUP_Experiment + GROUP_Experiment * 
FALSE     POST_ind, data = teaching_DID)
FALSE 
FALSE Residuals:
FALSE     Min      1Q  Median      3Q     Max 
FALSE -52.674 -12.000   2.364  10.364  46.364 
FALSE 
FALSE Coefficients:
FALSE                           Estimate Std. Error t value             Pr(>|t|)    
FALSE (Intercept)                 57.674      2.990  19.291 < 0.0000000000000002 ***
FALSE POST_ind                    18.326      4.228   4.334             0.000025 ***
FALSE GROUP_Experiment            -4.038      4.204  -0.961               0.3381    
FALSE POST_ind:GROUP_Experiment   13.674      5.945   2.300               0.0227 *  
FALSE ---
FALSE Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FALSE 
FALSE Residual standard error: 19.6 on 170 degrees of freedom
FALSE Multiple R-squared:  0.3153,  Adjusted R-squared:  0.3032 
FALSE F-statistic:  26.1 on 3 and 170 DF,  p-value: 0.00000000000006177
plot(DID)

plot.lmSim(DID)

There is a treatment effect but no difference in the beginning between the groups.

DID with covariates

mod <- lm(OUTCOME ~ ATT + GROUP_Experiment + SESSION_S2 + SESSION_S3 + SESSION_S4 + `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic + PREF_Visual + GENDER_Female + SLEVEL_Highschool + ID + POST_ind + GROUP_Experiment * POST_ind, teaching_DID)
summary(mod)
FALSE 
FALSE Call:
FALSE lm(formula = OUTCOME ~ ATT + GROUP_Experiment + SESSION_S2 + 
FALSE     SESSION_S3 + SESSION_S4 + `PREF_No Preference` + PREF_Tactile + 
FALSE     PREF_Kinesthetic + PREF_Visual + GENDER_Female + SLEVEL_Highschool + 
FALSE     ID + POST_ind + GROUP_Experiment * POST_ind, data = teaching_DID)
FALSE 
FALSE Residuals:
FALSE     Min      1Q  Median      3Q     Max 
FALSE -45.236 -12.170   1.709  11.486  39.727 
FALSE 
FALSE Coefficients:
FALSE                           Estimate Std. Error t value   Pr(>|t|)    
FALSE (Intercept)               24.33401   10.09113   2.411    0.01703 *  
FALSE ATT                        0.58944    0.18162   3.245    0.00143 ** 
FALSE GROUP_Experiment          -5.77073    6.81210  -0.847    0.39820    
FALSE SESSION_S2                -2.06097    4.11088  -0.501    0.61682    
FALSE SESSION_S3                 6.53638    4.44417   1.471    0.14333    
FALSE SESSION_S4                 1.79008    4.58592   0.390    0.69681    
FALSE `PREF_No Preference`       4.50878    3.76880   1.196    0.23334    
FALSE PREF_Tactile               5.55874    4.04905   1.373    0.17173    
FALSE PREF_Kinesthetic           4.52404    7.40799   0.611    0.54227    
FALSE PREF_Visual               -7.16461    6.13331  -1.168    0.24450    
FALSE GENDER_Female              0.82544    3.74080   0.221    0.82564    
FALSE SLEVEL_Highschool         -2.97465    3.25297  -0.914    0.36187    
FALSE ID                         0.04342    0.12739   0.341    0.73364    
FALSE POST_ind                  18.32558    4.00856   4.572 0.00000967 ***
FALSE GROUP_Experiment:POST_ind 13.67442    5.63666   2.426    0.01639 *  
FALSE ---
FALSE Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FALSE 
FALSE Residual standard error: 18.59 on 159 degrees of freedom
FALSE Multiple R-squared:  0.4244,  Adjusted R-squared:  0.3737 
FALSE F-statistic: 8.373 on 14 and 159 DF,  p-value: 0.000000000000251
plot(mod)

plot.lmSim(mod)

Model seems fine. We check the gam for linearity (of ATT as it is the only numerical value)

# Check gam
library(gam)
FALSE Loading required package: splines
FALSE Loading required package: foreach
FALSE Loaded gam 1.22-2
mod.gam <- gam(OUTCOME ~ lo(ATT) + GROUP_Experiment + SESSION_S2 + SESSION_S3 + SESSION_S4 + `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic + PREF_Visual + GENDER_Female + SLEVEL_Highschool + ID + POST_ind + GROUP_Experiment * POST_ind, teaching_DID, family = gaussian(link = 'identity'))
plot(mod.gam, se = T)

Gam is fine. Linearity is ok.

Selection of variables

library(regclass)
FALSE Loading required package: bestglm
FALSE Loading required package: VGAM
FALSE Loading required package: stats4
FALSE 
FALSE Attaching package: 'VGAM'
FALSE The following object is masked from 'package:gam':
FALSE 
FALSE     s
FALSE Loading required package: rpart
FALSE Loading required package: randomForest
FALSE randomForest 4.7-1.1
FALSE Type rfNews() to see new features/changes/bug fixes.
FALSE 
FALSE Attaching package: 'randomForest'
FALSE The following object is masked from 'package:dplyr':
FALSE 
FALSE     combine
FALSE Important regclass change from 1.3:
FALSE All functions that had a . in the name now have an _
FALSE all.correlations -> all_correlations, cor.demo -> cor_demo, etc.
sort(VIF(mod), decreasing = TRUE)
FALSE          GROUP_Experiment                        ID GROUP_Experiment:POST_ind 
FALSE                  5.842230                  5.154437                  3.023256 
FALSE                  POST_ind                SESSION_S3                SESSION_S4 
FALSE                  2.023256                  1.879374                  1.875224 
FALSE      `PREF_No Preference`                SESSION_S2              PREF_Tactile 
FALSE                  1.663469                  1.655288                  1.605863 
FALSE               PREF_Visual          PREF_Kinesthetic                       ATT 
FALSE                  1.401759                  1.212368                  1.202996 
FALSE         SLEVEL_Highschool             GENDER_Female 
FALSE                  1.163226                  1.108078

Shows which variables have higher importance but we don’t use this in the end

forward
mod_min <- lm(OUTCOME ~ 1, teaching_DID)
s <- step(mod_min, direction='forward', scope=formula(lm(OUTCOME ~ ATT + GROUP_Experiment + SESSION_S2 + SESSION_S3 + SESSION_S4 + `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic + PREF_Visual + GENDER_Female + SLEVEL_Highschool + ID + POST_ind + GROUP_Experiment * POST_ind, teaching_DID)))
FALSE Start:  AIC=1099.43
FALSE OUTCOME ~ 1
FALSE 
FALSE                        Df Sum of Sq   RSS    AIC
FALSE + POST_ind              1   27715.0 67712 1041.7
FALSE + ATT                   1    5476.8 89950 1091.1
FALSE + SESSION_S3            1    2753.7 92673 1096.3
FALSE + PREF_Visual           1    1600.5 93826 1098.5
FALSE <none>                              95427 1099.4
FALSE + SESSION_S2            1    1029.3 94398 1099.5
FALSE + PREF_Tactile          1     940.2 94487 1099.7
FALSE + `PREF_No Preference`  1     397.5 95029 1100.7
FALSE + GENDER_Female         1     341.1 95086 1100.8
FALSE + GROUP_Experiment      1     340.8 95086 1100.8
FALSE + SESSION_S4            1     280.1 95147 1100.9
FALSE + SLEVEL_Highschool     1     258.9 95168 1101.0
FALSE + ID                    1      81.0 95346 1101.3
FALSE + PREF_Kinesthetic      1      75.3 95352 1101.3
FALSE 
FALSE Step:  AIC=1041.73
FALSE OUTCOME ~ POST_ind
FALSE 
FALSE                        Df Sum of Sq   RSS    AIC
FALSE + ATT                   1    5476.8 62235 1029.0
FALSE + SESSION_S3            1    2753.7 64958 1036.5
FALSE + PREF_Visual           1    1600.5 66111 1039.6
FALSE + SESSION_S2            1    1029.3 66683 1041.1
FALSE + PREF_Tactile          1     940.2 66772 1041.3
FALSE <none>                              67712 1041.7
FALSE + `PREF_No Preference`  1     397.5 67314 1042.7
FALSE + GENDER_Female         1     341.1 67371 1042.8
FALSE + GROUP_Experiment      1     340.8 67371 1042.8
FALSE + SESSION_S4            1     280.1 67432 1043.0
FALSE + SLEVEL_Highschool     1     258.9 67453 1043.1
FALSE + ID                    1      81.0 67631 1043.5
FALSE + PREF_Kinesthetic      1      75.3 67637 1043.5
FALSE 
FALSE Step:  AIC=1029.05
FALSE OUTCOME ~ POST_ind + ATT
FALSE 
FALSE                        Df Sum of Sq   RSS    AIC
FALSE + SESSION_S3            1   2056.06 60179 1025.2
FALSE + PREF_Visual           1   1620.50 60615 1026.5
FALSE + SESSION_S2            1   1277.32 60958 1027.5
FALSE <none>                              62235 1029.0
FALSE + PREF_Tactile          1    564.32 61671 1029.5
FALSE + `PREF_No Preference`  1    498.48 61737 1029.7
FALSE + SLEVEL_Highschool     1    205.11 62030 1030.5
FALSE + GENDER_Female         1    175.90 62059 1030.6
FALSE + PREF_Kinesthetic      1    135.49 62100 1030.7
FALSE + GROUP_Experiment      1    110.86 62124 1030.7
FALSE + ID                    1     68.79 62166 1030.9
FALSE + SESSION_S4            1     31.52 62203 1031.0
FALSE 
FALSE Step:  AIC=1025.21
FALSE OUTCOME ~ POST_ind + ATT + SESSION_S3
FALSE 
FALSE                        Df Sum of Sq   RSS    AIC
FALSE + PREF_Visual           1   1082.12 59097 1024.0
FALSE <none>                              60179 1025.2
FALSE + SLEVEL_Highschool     1    634.63 59544 1025.4
FALSE + SESSION_S4            1    496.16 59683 1025.8
FALSE + SESSION_S2            1    440.12 59739 1025.9
FALSE + PREF_Tactile          1    440.06 59739 1025.9
FALSE + `PREF_No Preference`  1    228.42 59951 1026.5
FALSE + PREF_Kinesthetic      1    132.83 60046 1026.8
FALSE + GROUP_Experiment      1    128.36 60051 1026.8
FALSE + ID                    1    112.49 60066 1026.9
FALSE + GENDER_Female         1     13.09 60166 1027.2
FALSE 
FALSE Step:  AIC=1024.05
FALSE OUTCOME ~ POST_ind + ATT + SESSION_S3 + PREF_Visual
FALSE 
FALSE                        Df Sum of Sq   RSS    AIC
FALSE <none>                              59097 1024.0
FALSE + SESSION_S2            1    515.58 58581 1024.5
FALSE + SLEVEL_Highschool     1    469.45 58627 1024.7
FALSE + SESSION_S4            1    466.08 58631 1024.7
FALSE + ID                    1    455.84 58641 1024.7
FALSE + GROUP_Experiment      1    346.41 58750 1025.0
FALSE + PREF_Tactile          1    243.19 58854 1025.3
FALSE + PREF_Kinesthetic      1     88.24 59009 1025.8
FALSE + `PREF_No Preference`  1     74.19 59023 1025.8
FALSE + GENDER_Female         1     38.99 59058 1025.9
s$call
FALSE lm(formula = OUTCOME ~ POST_ind + ATT + SESSION_S3 + PREF_Visual, 
FALSE     data = teaching_DID)
backward
s <- step(mod, direction = 'backward')
FALSE Start:  AIC=1031.33
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S2 + SESSION_S3 + 
FALSE     SESSION_S4 + `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic + 
FALSE     PREF_Visual + GENDER_Female + SLEVEL_Highschool + ID + POST_ind + 
FALSE     GROUP_Experiment * POST_ind
FALSE 
FALSE                             Df Sum of Sq   RSS    AIC
FALSE - GENDER_Female              1      16.8 54947 1029.4
FALSE - ID                         1      40.1 54971 1029.5
FALSE - SESSION_S4                 1      52.6 54983 1029.5
FALSE - SESSION_S2                 1      86.8 55017 1029.6
FALSE - PREF_Kinesthetic           1     128.8 55059 1029.7
FALSE - SLEVEL_Highschool          1     288.9 55219 1030.2
FALSE - PREF_Visual                1     471.4 55402 1030.8
FALSE - `PREF_No Preference`       1     494.5 55425 1030.9
FALSE <none>                                   54930 1031.3
FALSE - PREF_Tactile               1     651.1 55582 1031.4
FALSE - SESSION_S3                 1     747.3 55678 1031.7
FALSE - GROUP_Experiment:POST_ind  1    2033.2 56964 1035.7
FALSE - ATT                        1    3638.9 58569 1040.5
FALSE 
FALSE Step:  AIC=1029.38
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S2 + SESSION_S3 + 
FALSE     SESSION_S4 + `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic + 
FALSE     PREF_Visual + SLEVEL_Highschool + ID + POST_ind + GROUP_Experiment:POST_ind
FALSE 
FALSE                             Df Sum of Sq   RSS    AIC
FALSE - ID                         1      36.0 54983 1027.5
FALSE - SESSION_S4                 1      60.6 55008 1027.6
FALSE - SESSION_S2                 1      79.5 55027 1027.6
FALSE - PREF_Kinesthetic           1     140.5 55088 1027.8
FALSE - SLEVEL_Highschool          1     290.6 55238 1028.3
FALSE - PREF_Visual                1     458.9 55406 1028.8
FALSE - `PREF_No Preference`       1     491.2 55438 1028.9
FALSE <none>                                   54947 1029.4
FALSE - PREF_Tactile               1     657.1 55604 1029.5
FALSE - SESSION_S3                 1     861.6 55809 1030.1
FALSE - GROUP_Experiment:POST_ind  1    2033.2 56980 1033.7
FALSE - ATT                        1    3632.3 58580 1038.5
FALSE 
FALSE Step:  AIC=1027.5
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S2 + SESSION_S3 + 
FALSE     SESSION_S4 + `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic + 
FALSE     PREF_Visual + SLEVEL_Highschool + POST_ind + GROUP_Experiment:POST_ind
FALSE 
FALSE                             Df Sum of Sq   RSS    AIC
FALSE - SESSION_S2                 1      74.5 55058 1025.7
FALSE - SESSION_S4                 1     109.8 55093 1025.8
FALSE - PREF_Kinesthetic           1     138.1 55121 1025.9
FALSE - SLEVEL_Highschool          1     273.4 55257 1026.4
FALSE - PREF_Visual                1     424.1 55407 1026.8
FALSE - `PREF_No Preference`       1     500.0 55483 1027.1
FALSE <none>                                   54983 1027.5
FALSE - PREF_Tactile               1     646.0 55629 1027.5
FALSE - SESSION_S3                 1     900.4 55884 1028.3
FALSE - GROUP_Experiment:POST_ind  1    2033.2 57016 1031.8
FALSE - ATT                        1    3674.0 58657 1036.8
FALSE 
FALSE Step:  AIC=1025.73
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + SESSION_S4 + 
FALSE     `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic + 
FALSE     PREF_Visual + SLEVEL_Highschool + POST_ind + GROUP_Experiment:POST_ind
FALSE 
FALSE                             Df Sum of Sq   RSS    AIC
FALSE - PREF_Kinesthetic           1     156.6 55214 1024.2
FALSE - SESSION_S4                 1     291.3 55349 1024.7
FALSE - SLEVEL_Highschool          1     305.7 55363 1024.7
FALSE - PREF_Visual                1     389.5 55447 1025.0
FALSE - `PREF_No Preference`       1     521.5 55579 1025.4
FALSE <none>                                   55058 1025.7
FALSE - PREF_Tactile               1     701.7 55759 1025.9
FALSE - SESSION_S3                 1    1658.3 56716 1028.9
FALSE - GROUP_Experiment:POST_ind  1    2033.2 57091 1030.0
FALSE - ATT                        1    3624.7 58682 1034.8
FALSE 
FALSE Step:  AIC=1024.23
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + SESSION_S4 + 
FALSE     `PREF_No Preference` + PREF_Tactile + PREF_Visual + SLEVEL_Highschool + 
FALSE     POST_ind + GROUP_Experiment:POST_ind
FALSE 
FALSE                             Df Sum of Sq   RSS    AIC
FALSE - SESSION_S4                 1     353.5 55568 1023.3
FALSE - SLEVEL_Highschool          1     374.7 55589 1023.4
FALSE - `PREF_No Preference`       1     395.5 55610 1023.5
FALSE - PREF_Visual                1     502.8 55717 1023.8
FALSE - PREF_Tactile               1     570.5 55785 1024.0
FALSE <none>                                   55214 1024.2
FALSE - SESSION_S3                 1    1812.2 57026 1027.8
FALSE - GROUP_Experiment:POST_ind  1    2033.2 57247 1028.5
FALSE - ATT                        1    3545.2 58759 1033.0
FALSE 
FALSE Step:  AIC=1023.34
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + `PREF_No Preference` + 
FALSE     PREF_Tactile + PREF_Visual + SLEVEL_Highschool + POST_ind + 
FALSE     GROUP_Experiment:POST_ind
FALSE 
FALSE                             Df Sum of Sq   RSS    AIC
FALSE - `PREF_No Preference`       1     366.1 55934 1022.5
FALSE - PREF_Visual                1     512.3 56080 1022.9
FALSE - PREF_Tactile               1     546.1 56114 1023.0
FALSE - SLEVEL_Highschool          1     553.2 56121 1023.1
FALSE <none>                                   55568 1023.3
FALSE - SESSION_S3                 1    1508.7 57076 1026.0
FALSE - GROUP_Experiment:POST_ind  1    2033.2 57601 1027.6
FALSE - ATT                        1    4125.7 59693 1033.8
FALSE 
FALSE Step:  AIC=1022.48
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + PREF_Tactile + 
FALSE     PREF_Visual + SLEVEL_Highschool + POST_ind + GROUP_Experiment:POST_ind
FALSE 
FALSE                             Df Sum of Sq   RSS    AIC
FALSE - PREF_Tactile               1     244.5 56178 1021.2
FALSE - SLEVEL_Highschool          1     566.5 56500 1022.2
FALSE <none>                                   55934 1022.5
FALSE - PREF_Visual                1     923.8 56858 1023.3
FALSE - SESSION_S3                 1    1823.6 57757 1026.1
FALSE - GROUP_Experiment:POST_ind  1    2033.2 57967 1026.7
FALSE - ATT                        1    4129.5 60063 1032.9
FALSE 
FALSE Step:  AIC=1021.24
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + PREF_Visual + 
FALSE     SLEVEL_Highschool + POST_ind + GROUP_Experiment:POST_ind
FALSE 
FALSE                             Df Sum of Sq   RSS    AIC
FALSE - SLEVEL_Highschool          1     538.9 56717 1020.9
FALSE <none>                                   56178 1021.2
FALSE - PREF_Visual                1    1138.9 57317 1022.7
FALSE - SESSION_S3                 1    1852.8 58031 1024.9
FALSE - GROUP_Experiment:POST_ind  1    2033.2 58212 1025.4
FALSE - ATT                        1    4347.3 60526 1032.2
FALSE 
FALSE Step:  AIC=1020.9
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + PREF_Visual + 
FALSE     POST_ind + GROUP_Experiment:POST_ind
FALSE 
FALSE                             Df Sum of Sq   RSS    AIC
FALSE <none>                                   56717 1020.9
FALSE - PREF_Visual                1    1300.2 58017 1022.8
FALSE - SESSION_S3                 1    1489.3 58206 1023.4
FALSE - GROUP_Experiment:POST_ind  1    2033.2 58750 1025.0
FALSE - ATT                        1    4543.4 61261 1032.3
s
FALSE 
FALSE Call:
FALSE lm(formula = OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + 
FALSE     PREF_Visual + POST_ind + GROUP_Experiment:POST_ind, data = teaching_DID)
FALSE 
FALSE Coefficients:
FALSE               (Intercept)                        ATT  
FALSE                   27.5544                     0.6071  
FALSE          GROUP_Experiment                 SESSION_S3  
FALSE                   -3.9340                     6.8695  
FALSE               PREF_Visual                   POST_ind  
FALSE                  -10.4343                    18.3256  
FALSE GROUP_Experiment:POST_ind  
FALSE                   13.6744
s$call
FALSE lm(formula = OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + 
FALSE     PREF_Visual + POST_ind + GROUP_Experiment:POST_ind, data = teaching_DID)

Selects the best model backwards which makes some variables significant.

Combination backward and forward
s <- step(mod)
FALSE Start:  AIC=1031.33
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S2 + SESSION_S3 + 
FALSE     SESSION_S4 + `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic + 
FALSE     PREF_Visual + GENDER_Female + SLEVEL_Highschool + ID + POST_ind + 
FALSE     GROUP_Experiment * POST_ind
FALSE 
FALSE                             Df Sum of Sq   RSS    AIC
FALSE - GENDER_Female              1      16.8 54947 1029.4
FALSE - ID                         1      40.1 54971 1029.5
FALSE - SESSION_S4                 1      52.6 54983 1029.5
FALSE - SESSION_S2                 1      86.8 55017 1029.6
FALSE - PREF_Kinesthetic           1     128.8 55059 1029.7
FALSE - SLEVEL_Highschool          1     288.9 55219 1030.2
FALSE - PREF_Visual                1     471.4 55402 1030.8
FALSE - `PREF_No Preference`       1     494.5 55425 1030.9
FALSE <none>                                   54930 1031.3
FALSE - PREF_Tactile               1     651.1 55582 1031.4
FALSE - SESSION_S3                 1     747.3 55678 1031.7
FALSE - GROUP_Experiment:POST_ind  1    2033.2 56964 1035.7
FALSE - ATT                        1    3638.9 58569 1040.5
FALSE 
FALSE Step:  AIC=1029.38
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S2 + SESSION_S3 + 
FALSE     SESSION_S4 + `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic + 
FALSE     PREF_Visual + SLEVEL_Highschool + ID + POST_ind + GROUP_Experiment:POST_ind
FALSE 
FALSE                             Df Sum of Sq   RSS    AIC
FALSE - ID                         1      36.0 54983 1027.5
FALSE - SESSION_S4                 1      60.6 55008 1027.6
FALSE - SESSION_S2                 1      79.5 55027 1027.6
FALSE - PREF_Kinesthetic           1     140.5 55088 1027.8
FALSE - SLEVEL_Highschool          1     290.6 55238 1028.3
FALSE - PREF_Visual                1     458.9 55406 1028.8
FALSE - `PREF_No Preference`       1     491.2 55438 1028.9
FALSE <none>                                   54947 1029.4
FALSE - PREF_Tactile               1     657.1 55604 1029.5
FALSE - SESSION_S3                 1     861.6 55809 1030.1
FALSE - GROUP_Experiment:POST_ind  1    2033.2 56980 1033.7
FALSE - ATT                        1    3632.3 58580 1038.5
FALSE 
FALSE Step:  AIC=1027.5
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S2 + SESSION_S3 + 
FALSE     SESSION_S4 + `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic + 
FALSE     PREF_Visual + SLEVEL_Highschool + POST_ind + GROUP_Experiment:POST_ind
FALSE 
FALSE                             Df Sum of Sq   RSS    AIC
FALSE - SESSION_S2                 1      74.5 55058 1025.7
FALSE - SESSION_S4                 1     109.8 55093 1025.8
FALSE - PREF_Kinesthetic           1     138.1 55121 1025.9
FALSE - SLEVEL_Highschool          1     273.4 55257 1026.4
FALSE - PREF_Visual                1     424.1 55407 1026.8
FALSE - `PREF_No Preference`       1     500.0 55483 1027.1
FALSE <none>                                   54983 1027.5
FALSE - PREF_Tactile               1     646.0 55629 1027.5
FALSE - SESSION_S3                 1     900.4 55884 1028.3
FALSE - GROUP_Experiment:POST_ind  1    2033.2 57016 1031.8
FALSE - ATT                        1    3674.0 58657 1036.8
FALSE 
FALSE Step:  AIC=1025.73
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + SESSION_S4 + 
FALSE     `PREF_No Preference` + PREF_Tactile + PREF_Kinesthetic + 
FALSE     PREF_Visual + SLEVEL_Highschool + POST_ind + GROUP_Experiment:POST_ind
FALSE 
FALSE                             Df Sum of Sq   RSS    AIC
FALSE - PREF_Kinesthetic           1     156.6 55214 1024.2
FALSE - SESSION_S4                 1     291.3 55349 1024.7
FALSE - SLEVEL_Highschool          1     305.7 55363 1024.7
FALSE - PREF_Visual                1     389.5 55447 1025.0
FALSE - `PREF_No Preference`       1     521.5 55579 1025.4
FALSE <none>                                   55058 1025.7
FALSE - PREF_Tactile               1     701.7 55759 1025.9
FALSE - SESSION_S3                 1    1658.3 56716 1028.9
FALSE - GROUP_Experiment:POST_ind  1    2033.2 57091 1030.0
FALSE - ATT                        1    3624.7 58682 1034.8
FALSE 
FALSE Step:  AIC=1024.23
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + SESSION_S4 + 
FALSE     `PREF_No Preference` + PREF_Tactile + PREF_Visual + SLEVEL_Highschool + 
FALSE     POST_ind + GROUP_Experiment:POST_ind
FALSE 
FALSE                             Df Sum of Sq   RSS    AIC
FALSE - SESSION_S4                 1     353.5 55568 1023.3
FALSE - SLEVEL_Highschool          1     374.7 55589 1023.4
FALSE - `PREF_No Preference`       1     395.5 55610 1023.5
FALSE - PREF_Visual                1     502.8 55717 1023.8
FALSE - PREF_Tactile               1     570.5 55785 1024.0
FALSE <none>                                   55214 1024.2
FALSE - SESSION_S3                 1    1812.2 57026 1027.8
FALSE - GROUP_Experiment:POST_ind  1    2033.2 57247 1028.5
FALSE - ATT                        1    3545.2 58759 1033.0
FALSE 
FALSE Step:  AIC=1023.34
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + `PREF_No Preference` + 
FALSE     PREF_Tactile + PREF_Visual + SLEVEL_Highschool + POST_ind + 
FALSE     GROUP_Experiment:POST_ind
FALSE 
FALSE                             Df Sum of Sq   RSS    AIC
FALSE - `PREF_No Preference`       1     366.1 55934 1022.5
FALSE - PREF_Visual                1     512.3 56080 1022.9
FALSE - PREF_Tactile               1     546.1 56114 1023.0
FALSE - SLEVEL_Highschool          1     553.2 56121 1023.1
FALSE <none>                                   55568 1023.3
FALSE - SESSION_S3                 1    1508.7 57076 1026.0
FALSE - GROUP_Experiment:POST_ind  1    2033.2 57601 1027.6
FALSE - ATT                        1    4125.7 59693 1033.8
FALSE 
FALSE Step:  AIC=1022.48
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + PREF_Tactile + 
FALSE     PREF_Visual + SLEVEL_Highschool + POST_ind + GROUP_Experiment:POST_ind
FALSE 
FALSE                             Df Sum of Sq   RSS    AIC
FALSE - PREF_Tactile               1     244.5 56178 1021.2
FALSE - SLEVEL_Highschool          1     566.5 56500 1022.2
FALSE <none>                                   55934 1022.5
FALSE - PREF_Visual                1     923.8 56858 1023.3
FALSE - SESSION_S3                 1    1823.6 57757 1026.1
FALSE - GROUP_Experiment:POST_ind  1    2033.2 57967 1026.7
FALSE - ATT                        1    4129.5 60063 1032.9
FALSE 
FALSE Step:  AIC=1021.24
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + PREF_Visual + 
FALSE     SLEVEL_Highschool + POST_ind + GROUP_Experiment:POST_ind
FALSE 
FALSE                             Df Sum of Sq   RSS    AIC
FALSE - SLEVEL_Highschool          1     538.9 56717 1020.9
FALSE <none>                                   56178 1021.2
FALSE - PREF_Visual                1    1138.9 57317 1022.7
FALSE - SESSION_S3                 1    1852.8 58031 1024.9
FALSE - GROUP_Experiment:POST_ind  1    2033.2 58212 1025.4
FALSE - ATT                        1    4347.3 60526 1032.2
FALSE 
FALSE Step:  AIC=1020.9
FALSE OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + PREF_Visual + 
FALSE     POST_ind + GROUP_Experiment:POST_ind
FALSE 
FALSE                             Df Sum of Sq   RSS    AIC
FALSE <none>                                   56717 1020.9
FALSE - PREF_Visual                1    1300.2 58017 1022.8
FALSE - SESSION_S3                 1    1489.3 58206 1023.4
FALSE - GROUP_Experiment:POST_ind  1    2033.2 58750 1025.0
FALSE - ATT                        1    4543.4 61261 1032.3
s
FALSE 
FALSE Call:
FALSE lm(formula = OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + 
FALSE     PREF_Visual + POST_ind + GROUP_Experiment:POST_ind, data = teaching_DID)
FALSE 
FALSE Coefficients:
FALSE               (Intercept)                        ATT  
FALSE                   27.5544                     0.6071  
FALSE          GROUP_Experiment                 SESSION_S3  
FALSE                   -3.9340                     6.8695  
FALSE               PREF_Visual                   POST_ind  
FALSE                  -10.4343                    18.3256  
FALSE GROUP_Experiment:POST_ind  
FALSE                   13.6744
s$call
FALSE lm(formula = OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + 
FALSE     PREF_Visual + POST_ind + GROUP_Experiment:POST_ind, data = teaching_DID)
Model

Based on forward and backward selection as well as the combination we fit the following model

mod2 <- lm(formula = lm(formula = OUTCOME ~ ATT + GROUP_Experiment + SESSION_S3 + PREF_Visual + POST_ind + GROUP_Experiment:POST_ind, data = teaching_DID),
    data = teaching_DID)
summary(mod2)
FALSE 
FALSE Call:
FALSE lm(formula = lm(formula = OUTCOME ~ ATT + GROUP_Experiment + 
FALSE     SESSION_S3 + PREF_Visual + POST_ind + GROUP_Experiment:POST_ind, 
FALSE     data = teaching_DID), data = teaching_DID)
FALSE 
FALSE Residuals:
FALSE     Min      1Q  Median      3Q     Max 
FALSE -47.446  -9.294   1.376  10.252  45.416 
FALSE 
FALSE Coefficients:
FALSE                           Estimate Std. Error t value   Pr(>|t|)    
FALSE (Intercept)                27.5544     8.2781   3.329   0.001074 ** 
FALSE ATT                         0.6071     0.1660   3.658   0.000341 ***
FALSE GROUP_Experiment           -3.9340     4.0090  -0.981   0.327874    
FALSE SESSION_S3                  6.8695     3.2804   2.094   0.037762 *  
FALSE PREF_Visual               -10.4343     5.3329  -1.957   0.052063 .  
FALSE POST_ind                   18.3256     3.9745   4.611 0.00000794 ***
FALSE GROUP_Experiment:POST_ind  13.6744     5.5887   2.447   0.015450 *  
FALSE ---
FALSE Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FALSE 
FALSE Residual standard error: 18.43 on 167 degrees of freedom
FALSE Multiple R-squared:  0.4056,  Adjusted R-squared:  0.3843 
FALSE F-statistic:    19 on 6 and 167 DF,  p-value: < 0.00000000000000022
plot(mod2)

plot.lmSim(mod2)

We see there is no difference between the groups in the beginning. There is a difference between the times (post and pre). There is a treatment effect (interaction term). ATT higher better score. Session 3 seems to be better taught. It could be that visual preference has a negative effect.

Williams, Dakota, Ilyana

\[ We\ build\ a\ GLM\ model:\\ \\POST\ = GROUP + PREF + PRE \]

# Convert GROUP and PREF to factors
#data$GROUP <- factor(data$GROUP)
#data$PREF <- factor(data$PREF)
data <- read_excel("Bteaching.xls")
library(ggplot2)
FALSE 
FALSE Attaching package: 'ggplot2'
FALSE The following object is masked from 'package:randomForest':
FALSE 
FALSE     margin
data$GENDER <- ifelse(data$GENDER == "M", 0, 1)
data$GROUP <- ifelse(data$GROUP == "C", 0, 1)
data$SLEVEL <- ifelse(data$SLEVEL == "E", 0, 1)
data$SESSION <- ifelse(data$SESSION == "S1", 1, 
                        ifelse(data$SESSION == "S2", 2, 
                               ifelse(data$SESSION == "S3", 3, 
                                      ifelse(data$SESSION == "S4", 4, NA))))
data$PREF <- ifelse(data$PREF == "A", 1, 
                        ifelse(data$PREF == "K", 2, 
                               ifelse(data$PREF == "N", 3, 
                                      ifelse(data$PREF == "T", 4,
                                             ifelse(data$PREF == "V", 5, NA)))))

colnames(data)[1] <- "Group"
colnames(data)[2] <- "Session"
colnames(data)[3] <- "Preference"
colnames(data)[4] <- "Gender"
colnames(data)[5] <- "School Level"
colnames(data)[6] <- "Pre Test Score"
colnames(data)[7] <- "Post Test Score"
colnames(data)[8] <- "Attention Test Score"
require(corrplot)
FALSE Loading required package: corrplot
FALSE corrplot 0.92 loaded
#2B3433, #2F4F4E, #0D8387, #CBE4DE
#("#FF0000", "#FFFFFF", "#0000FF")
cor_mat <- cor(data[, 7], data[, -7], method = "spearman")
col <- colorRampPalette(c("#2B3433", "#FFFFFF", "#0D8387"))(25)
corrplot(cor_mat, col = col, tl.col = "black",
         tl.srt = 40, tl.cex = 1.2,
         title = "Correlation Matrix", mar = c(0, 0, 1, 0))

#I have this corrplot i want to order the correlation circles from highest to lowest correlation
# Select the 7th column of the data and compute the correlation matrix
data <- read_excel("Bteaching.xls")

data$SLEVEL <- factor(data$SLEVEL, level = c('E', 'H'), labels = c('Elementary', 'Highschool'))
data$PREF <- factor(data$PREF, level = c('A', 'N', 'T', 'K', 'V'), labels = c('A', 'N', 'T', 'K', 'V'))
data$GROUP <- factor(data$GROUP)


modelGLMAAA <- glm(formula = POST ~ GROUP + PRE, data = data, family =gaussian(link = "identity"))
summary(modelGLMAAA)
FALSE 
FALSE Call:
FALSE glm(formula = POST ~ GROUP + PRE, family = gaussian(link = "identity"), 
FALSE     data = data)
FALSE 
FALSE Deviance Residuals: 
FALSE     Min       1Q   Median       3Q      Max  
FALSE -29.993   -4.946    1.355    6.305   19.804  
FALSE 
FALSE Coefficients:
FALSE             Estimate Std. Error t value             Pr(>|t|)    
FALSE (Intercept) 65.04353    2.93762  22.142 < 0.0000000000000002 ***
FALSE GROUPE      10.40348    2.18015   4.772           0.00000762 ***
FALSE PRE          0.18997    0.04332   4.386           0.00003332 ***
FALSE ---
FALSE Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FALSE 
FALSE (Dispersion parameter for gaussian family taken to be 102.7003)
FALSE 
FALSE     Null deviance: 12621.6  on 86  degrees of freedom
FALSE Residual deviance:  8626.8  on 84  degrees of freedom
FALSE AIC: 654.81
FALSE 
FALSE Number of Fisher Scoring iterations: 2
data <- read_excel("Bteaching.xls")
data$GENDER <- ifelse(data$GENDER == "M", 0, 1)
data$GROUP <- ifelse(data$GROUP == "C", 0, 1)
data$SESSION <- ifelse(data$SESSION == "S1", 1, 
                        ifelse(data$SESSION == "S2", 2, 
                               ifelse(data$SESSION == "S3", 3, 
                                      ifelse(data$SESSION == "S4", 4, NA))))
bn <- subset(data, PRE < 56)
bf <- subset(data, PRE > 57)
bn$SLEVEL <- factor(bn$SLEVEL, level = c('E', 'H'), labels = c('Elementary', 'Highschool'))
bn$PREF <- factor(bn$PREF, level = c('A', 'N', 'T', 'K', 'V'), labels = c('A', 'N', 'T', 'K', 'V'))
bn$GROUP <- factor(bn$GROUP)


bf$SLEVEL <- factor(bf$SLEVEL, level = c('E', 'H'), labels = c('Elementary', 'Highschool'))
bf$PREF <- factor(bf$PREF, level = c('A', 'N', 'T', 'K', 'V'), labels = c('A', 'N', 'T', 'K', 'V'))
bf$GROUP <- factor(bf$GROUP)

modelGLMbn <- glm(formula = POST ~  GROUP + PRE , data = bn, family =gaussian(link = "identity"))
summary(modelGLMbn)
FALSE 
FALSE Call:
FALSE glm(formula = POST ~ GROUP + PRE, family = gaussian(link = "identity"), 
FALSE     data = bn)
FALSE 
FALSE Deviance Residuals: 
FALSE     Min       1Q   Median       3Q      Max  
FALSE -24.848   -7.213   -1.311    9.017   20.925  
FALSE 
FALSE Coefficients:
FALSE             Estimate Std. Error t value            Pr(>|t|)    
FALSE (Intercept)  59.1553     4.7677  12.408 0.00000000000000182 ***
FALSE GROUP1       14.2274     3.3787   4.211            0.000136 ***
FALSE PRE           0.3385     0.1100   3.076            0.003725 ** 
FALSE ---
FALSE Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FALSE 
FALSE (Dispersion parameter for gaussian family taken to be 121.9993)
FALSE 
FALSE     Null deviance: 7882.2  on 43  degrees of freedom
FALSE Residual deviance: 5002.0  on 41  degrees of freedom
FALSE AIC: 341.14
FALSE 
FALSE Number of Fisher Scoring iterations: 2
modelGLMbf <- glm(formula = POST ~  GROUP + PRE , data = bf, family =gaussian(link = "identity"))
summary(modelGLMbf)
FALSE 
FALSE Call:
FALSE glm(formula = POST ~ GROUP + PRE, family = gaussian(link = "identity"), 
FALSE     data = bf)
FALSE 
FALSE Deviance Residuals: 
FALSE     Min       1Q   Median       3Q      Max  
FALSE -20.544   -3.983    1.490    4.742   15.456  
FALSE 
FALSE Coefficients:
FALSE             Estimate Std. Error t value    Pr(>|t|)    
FALSE (Intercept)  52.0628     8.5897   6.061 0.000000388 ***
FALSE GROUP1        7.0986     2.5775   2.754     0.00881 ** 
FALSE PRE           0.3626     0.1090   3.326     0.00189 ** 
FALSE ---
FALSE Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FALSE 
FALSE (Dispersion parameter for gaussian family taken to be 71.37899)
FALSE 
FALSE     Null deviance: 4178.6  on 42  degrees of freedom
FALSE Residual deviance: 2855.2  on 40  degrees of freedom
FALSE AIC: 310.44
FALSE 
FALSE Number of Fisher Scoring iterations: 2
data <- read_excel("Bteaching.xls")
data$GENDER <- ifelse(data$GENDER == "M", 0, 1)
data$GROUP <- ifelse(data$GROUP == "C", 0, 1)
data$SESSION <- ifelse(data$SESSION == "S1", 1, 
                        ifelse(data$SESSION == "S2", 2, 
                               ifelse(data$SESSION == "S3", 3, 
                                      ifelse(data$SESSION == "S4", 4, NA))))
bE <- subset(data, SLEVEL == "E")
bH <- subset(data, SLEVEL == "H")
bE$SLEVEL <- factor(bE$SLEVEL, level = c('E', 'H'), labels = c('Elementary', 'Highschool'))
bE$PREF <- factor(bE$PREF, level = c('A', 'N', 'T', 'K', 'V'), labels = c('A', 'N', 'T', 'K', 'V'))
bE$GROUP <- factor(bE$GROUP)


bH$SLEVEL <- factor(bH$SLEVEL, level = c('E', 'H'), labels = c('Elementary', 'Highschool'))
bH$PREF <- factor(bH$PREF, level = c('A', 'N', 'T', 'K', 'V'), labels = c('A', 'N', 'T', 'K', 'V'))
bH$GROUP <- factor(bH$GROUP)

modelGLMbE <- glm(formula = POST ~  GROUP + PRE, data = bE, family =gaussian(link = "identity"))
summary(modelGLMbE)
FALSE 
FALSE Call:
FALSE glm(formula = POST ~ GROUP + PRE, family = gaussian(link = "identity"), 
FALSE     data = bE)
FALSE 
FALSE Deviance Residuals: 
FALSE     Min       1Q   Median       3Q      Max  
FALSE -30.687   -4.667    1.333    6.425   20.731  
FALSE 
FALSE Coefficients:
FALSE             Estimate Std. Error t value             Pr(>|t|)    
FALSE (Intercept) 65.76547    3.47571  18.921 < 0.0000000000000002 ***
FALSE GROUP1       8.58190    2.71125   3.165             0.002506 ** 
FALSE PRE          0.18430    0.04968   3.710             0.000478 ***
FALSE ---
FALSE Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FALSE 
FALSE (Dispersion parameter for gaussian family taken to be 107.8641)
FALSE 
FALSE     Null deviance: 8482.7  on 58  degrees of freedom
FALSE Residual deviance: 6040.4  on 56  degrees of freedom
FALSE AIC: 448.53
FALSE 
FALSE Number of Fisher Scoring iterations: 2
modelGLMbH<- glm(formula = POST ~  GROUP + PRE, data = bH, family =gaussian(link = "identity"))
summary(modelGLMbH)
FALSE 
FALSE Call:
FALSE glm(formula = POST ~ GROUP + PRE, family = gaussian(link = "identity"), 
FALSE     data = bH)
FALSE 
FALSE Deviance Residuals: 
FALSE      Min        1Q    Median        3Q       Max  
FALSE -21.0875   -3.9235    0.7582    6.4303   18.4303  
FALSE 
FALSE Coefficients:
FALSE             Estimate Std. Error t value       Pr(>|t|)    
FALSE (Intercept) 61.54648    5.93420  10.371 0.000000000153 ***
FALSE GROUP1      14.12158    3.76730   3.748       0.000942 ***
FALSE PRE          0.23606    0.09636   2.450       0.021639 *  
FALSE ---
FALSE Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FALSE 
FALSE (Dispersion parameter for gaussian family taken to be 95.66149)
FALSE 
FALSE     Null deviance: 4109.7  on 27  degrees of freedom
FALSE Residual deviance: 2391.5  on 25  degrees of freedom
FALSE AIC: 211.99
FALSE 
FALSE Number of Fisher Scoring iterations: 2
data <- read_excel("Bteaching.xls")

data$GENDER <- factor(data$GENDER)
data$GROUP <- factor(data$GROUP)
data$SLEVEL <- factor(data$SLEVEL)
data$SESSION <- factor(data$SESSION)
data$PREF <- factor(data$PREF)
#data$PREF1A <- ifelse(data$PREF == "A", 1, 10)
#data$PREF2K <- ifelse(data$PREF == "K", 2, 10)
#data$PREF3N <- ifelse(data$PREF == "N", 3, 10)
#data$PREF5V <- ifelse(data$PREF == "V", 5, 10)
#data$PREF4T <- ifelse(data$PREF == "T", 4, 10)

modelGLM2 <- glm(POST ~  PRE + PREF + GENDER, data = data, family =gaussian(link = "identity"))
summary(modelGLM2)
FALSE 
FALSE Call:
FALSE glm(formula = POST ~ PRE + PREF + GENDER, family = gaussian(link = "identity"), 
FALSE     data = data)
FALSE 
FALSE Deviance Residuals: 
FALSE     Min       1Q   Median       3Q      Max  
FALSE -28.387   -7.970    1.110    7.002   28.830  
FALSE 
FALSE Coefficients:
FALSE              Estimate Std. Error t value             Pr(>|t|)    
FALSE (Intercept)  70.91293    3.50008  20.260 < 0.0000000000000002 ***
FALSE PRE           0.21085    0.05056   4.170            0.0000766 ***
FALSE PREFK       -10.14510    6.14079  -1.652               0.1024    
FALSE PREFN        -2.56565    3.16427  -0.811               0.4199    
FALSE PREFT        -0.01015    3.40394  -0.003               0.9976    
FALSE PREFV         7.74824    4.89686   1.582               0.1175    
FALSE GENDERM      -5.01453    3.01101  -1.665               0.0997 .  
FALSE ---
FALSE Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FALSE 
FALSE (Dispersion parameter for gaussian family taken to be 121.7772)
FALSE 
FALSE     Null deviance: 12621.6  on 86  degrees of freedom
FALSE Residual deviance:  9742.2  on 80  degrees of freedom
FALSE AIC: 673.39
FALSE 
FALSE Number of Fisher Scoring iterations: 2
plot(modelGLM2)

library(ggplot2)
btc <- subset(BB, GROUP == "C")
bte <- subset(BB, GROUP == "E")

# Combine the data into a data frame
dataCE <- data.frame(group = c(rep("Control", length(btc$POST)), rep("Experiment", length(bte$POST))), value = c(btc$POST, bte$POST))

# Create the box plot graph
ggplot(dataCE, aes(x = group, y = value, fill = group)) + geom_boxplot() + theme_classic()

# Combine the data into a data frame
dataCE <- data.frame(group = c(rep("Control", length(btc$POST)), rep("Experiment", length(bte$POST))), value = c(btc$POST, bte$POST))


# Data frame for each preference for the control group
bc <- subset(BB, GROUP == "C")

bta <- subset(bc, PREF == "A")
btk <- subset(bc, PREF == "K")
btn <- subset(bc, PREF == "N")
btt <- subset(bc, PREF == "T")
btv <- subset(bc, PREF == "V")


dataAKNTV <- data.frame(group = c(rep("Auditory", length(bta$POST)), rep("Kinesthetic",length(btk$POST)), rep("No preference", length(btn$POST)), rep("Tactile", length(btt$POST)), rep("Visual", length(btv$POST))), value = c(bta$POST, btk$POST, btn$POST, btt$POST, btv$POST))

ggplot(dataAKNTV, aes(x = group, y = value, fill = group)) + geom_boxplot() + theme_classic()

# Data frame for each preference for the experiment group
be <- subset(BB, GROUP == "E")

bta <- subset(be, PREF == "A")
btk <- subset(be, PREF == "K")
btn <- subset(be, PREF == "N")
btt <- subset(be, PREF == "T")
btv <- subset(be, PREF == "V")



dataAKNTV <- data.frame(group = c(rep("Auditory", length(bta$POST)), rep("Kinesthetic",length(btk$POST)), rep("No preference", length(btn$POST)), rep("Tactile", length(btt$POST)), rep("Visual", length(btv$POST))), value = c(bta$POST, btk$POST, btn$POST, btt$POST, btv$POST))

ggplot(dataAKNTV, aes(x = group, y = value, fill = group)) + geom_boxplot() + theme_classic()

btm <- subset(BB, GENDER == "M")
btf <- subset(BB, GENDER == "F")

# Combine the data into a data frame
dataMF <- data.frame(group = c(rep("Male", length(btm$POST)), rep("Female", length(btf$POST))), value = c(btm$POST, btf$POST))

# Create the box plot graph
ggplot(dataMF, aes(x = group, y = value, fill = group)) + geom_boxplot() + theme_classic()

mean(btf$POST)-mean(btm$POST)
FALSE [1] 5.179832
mean(btf$PRE)-mean(btm$PRE)
FALSE [1] 1.882353
mean(btm$POST)
FALSE [1] 76.70588

We have a dataset of 87 students, with 8 variables. (qualitative and quatitative) – GROUP: C = ‘Control’ (i.e. traditional teaching methods), E = ‘Experiment’ (i.e. incorporated learning styles); – SESSION: students were randomly assigned to 1 of 4 sessions (denoted by S1 to S4) within each GROUP; – PREF: students’ learning style preference (T = ‘Tactile’, K = ‘Kinesthetic’, A = ‘Auditory’, V = ‘Visual’, N = ‘No preference’); – GENDER: M = male, F = female; – SLEVEL: school level (E = ‘Elementary, H = ’High School’); – PRE: pre-test score (out of 100); – POST: post-test score (out of 100); – ATT: attitude scale score (out of 60).

We want to know if the students in the experiment group who were able to choose a PREF scored higher in a POST test than the students who were in a control group. PREF is the response variable. Build a model in R to answer the question. The client wants you to investigate his research hypothesis that students in training sessions that utilise a processing activity that matches the students’ perceptual learning style preferences will demonstrate greater long-term retention of content than students in a traditional setting that has not utilised that processing activity. Build a model in R considering t

data <- read_excel("Bteaching.xls")
# Create a new variable for matching learning style preference
data$MATCH <- ifelse(data$PREF == "N", "ALL", data$PREF)

# Split the data into Control and Experiment groups
control_data <- subset(data, GROUP == "C")
experiment_data <- subset(data, GROUP == "E")

# Calculate the mean post-test scores for each group and learning style preference
control_means <- aggregate(control_data$POST, by = list(control_data$MATCH), FUN = mean)
experiment_means <- aggregate(experiment_data$POST, by = list(experiment_data$MATCH), FUN = mean)